# coding:utf-8
import requests
import re
import yaml
import os
from configparser import ConfigParser

class YuqueExport():

    def __init__(self):
        self.base_url = YUQUE_URL
        self.yuque_token = YUQUE_TOKEN
        self.mrdoc_url = MRDOC_URL
        self.mrdoc_token = MRDOC_TOKEN
        self.headers = {
            "User-Agent": "yuque_export",
            "X-Auth-Token": YUQUE_TOKEN
        }
        self.doc_uuid_map = {}

    def get_user_info(self):
        res_obj = requests.get(url=self.base_url + '/user', headers=self.headers)
        if res_obj.status_code != 200:
            raise ValueError("Token 信息错误")
        user_json = res_obj.json()
        self.login_id = user_json['data']['login']
        self.uid = user_json['data']['id']
        self.username = user_json['data']['name']
        print("=========== 用户信息初始化成功 ==========")

    # 获取知识库列表
    def get_repos(self):
        repos_json = requests.get(self.base_url + '/users/' + self.login_id + '/repos', headers=self.headers).json()
        for item in repos_json['data']:
            print(">>>发现知识库：",item['id'],item['name'])
            rid = item['id']  # 知识库id
            name = item['name']  # 知识库名称
            desc = item['description'] # 知识库描述
            item = {
                "name":name,
                "desc":desc,
                "role":1
            }
            project_created = self.create_project(info=item)
            if project_created['status']:
                print("+已新建文集：",name)
                project_id = project_created['data']
            else:
                raise "[x]新建文集异常:{}".format(project_created)

            docs = self.get_repo_docs(rid=rid)
            # 按目录导入文档
            if YUQUE_TOC:
                repo_toc = self.get_repo_toc(rid=rid)
                for toc in repo_toc:
                    if toc['type'] == 'META':
                        continue
                    for doc in docs:
                        if toc['type'] == 'DOC' and toc['id'] == doc['id']:
                            print(">>>发现文档信息：", doc['id'], doc['title'], doc['slug'], doc['format'])
                            doc_slug = doc['slug']
                            doc_title = doc['title']
                            break
                    doc_content = self.convert_img(doc=self.get_repo_doc(rid=rid,slug=doc_slug))
                    parent_doc = 0 if toc['parent_uuid'] == '' else self.doc_uuid_map[toc['parent_uuid']]
                    # print("上级文档为：",parent_doc)
                    data = {
                        'pid': project_id,
                        'title': doc_title,
                        'doc': doc_content,
                        'editor_mode': editor_mode,
                        'parent_doc': parent_doc
                    }
                    mrdoc_doc = self.create_doc(info=data)
                    if mrdoc_doc['status']:
                        self.doc_uuid_map[toc['uuid']] = mrdoc_doc['data']
                        # print("当前UUID目录:",self.doc_uuid_map)
                        print("+已新建文档：",doc_title,"上级文档设为：",parent_doc)

            # 直接导入全部文档
            else:
                for doc in docs:
                    print(">>>发现文档信息：",doc['id'],doc['title'],doc['slug'],doc['format'])
                    doc_content = self.convert_img(doc=self.get_repo_doc(rid=rid,slug=doc['slug']))
                    data = {
                        'pid': project_id,
                        'title': doc['title'],
                        'doc': doc_content,
                        'editor_mode': editor_mode,
                    }
                    mrdoc_doc = self.create_doc(info=data)
                    if mrdoc_doc['status']:
                        print("+已新建文档：",doc['title'])

    # 获取知识库目录
    def get_repo_toc(self,rid):
        repo_json = requests.get(self.base_url + '/repos/' + str(rid), headers=self.headers).json()
        # print(repo_json)
        repo_data = repo_json['data']
        toc = repo_data['toc_yml']
        # print(toc)
        yaml_toc = yaml.safe_load(toc)
        # print(yaml_toc)
        # for t in yaml_toc:
        #     print(t)
        return yaml_toc

    # 获取知识库文档列表
    def get_repo_docs(self,rid):
        docs_json = requests.get(self.base_url + '/repos/' + str(rid) + '/docs',headers=self.headers).json()
        return docs_json['data']

    # 获取知识库文档详情
    def get_repo_doc(self,rid,slug):
        doc_json = requests.get(self.base_url + '/repos/' + str(rid) + '/docs/' + slug,headers=self.headers).json()
        if editor_mode == 1:
            doc_body = doc_json['data']['body']
        else:
            doc_body = doc_json['data']['body_html']
        doc_content = self.convert_img(doc_body)
        return doc_content

    # 转换文档内的图片
    def convert_img(self,doc):
        # 查找MD内容中的静态文件
        pattern = r"\!\[.*?\]\(.*?\)"
        media_list = re.findall(pattern, doc)
        if len(media_list) > 0:
            for media in media_list:
                img_url = media.split("(")[-1].split(")")[0] # 媒体文件的文件名
                print(">>>发现图片：",img_url)
                is_upload_img = self.upload_img(url=img_url)
                if is_upload_img['code'] == 0:
                    print("+转存图片成功：",img_url)
                    doc = doc.replace(img_url,is_upload_img['data']['url'])

        # 查找<img>标签形式的静态图片
        img_pattern = r'<img[^>]*/>'
        img_list = re.findall(img_pattern, doc)
        if len(img_list) > 0:
            for img in img_list:
                img_url = re.findall('src="([^"]+)"', img)[0]
                print(">>>发现图片：",img_url)
                is_upload_img = self.upload_img(url=img_url)
                if is_upload_img['code'] == 0:
                    print("+转存图片成功：", img_url)
                    doc = doc.replace(img_url,is_upload_img['data']['url'])
        return doc

    # 新建文集
    def create_project(self,info):
        resp = requests.post(self.mrdoc_url + "/api/create_project/?token="+self.mrdoc_token,data=info)
        return resp.json()

    # 新建文档
    def create_doc(self,info):
        resp = requests.post(self.mrdoc_url + "/api/create_doc/?token="+self.mrdoc_token,data=info)
        return resp.json()

    # 上传图片
    def upload_img(self,url):
        resp = requests.post(self.mrdoc_url + "/api/upload_img_url/?token="+self.mrdoc_token,data={"url":url})
        return resp.json()

if __name__ == '__main__':
    print("""
 __     __                       ___    __  __      _____             
 \ \   / /                      |__ \  |  \/  |    |  __ \            
  \ \_/ /   _  __ _ _   _  ___     ) | | \  / |_ __| |  | | ___   ___ 
   \   / | | |/ _` | | | |/ _ \   / /  | |\/| | '__| |  | |/ _ \ / __|
    | || |_| | (_| | |_| |  __/  / /_  | |  | | |  | |__| | (_) | (__ 
    |_| \__,_|\__, |\__,_|\___| |____| |_|  |_|_|  |_____/ \___/ \___|
                 | |                                                  
                 |_|                                                  

【语雀知识库导入到觅思文档】 v20230316
* 遍历用户所有知识库；
* 支持按知识库目录导入；
* 支持文档内的图片转存至觅思文档；
        """)
    CONFIG = ConfigParser()
    CONFIG.read(os.path.join('./config.ini'), encoding='utf-8')

    MRDOC_URL = CONFIG.get("mrdoc", "url", fallback="")
    if MRDOC_URL == "":
        MRDOC_URL = input("1)请输入你的觅思文档地址:")
    else:
        print("1)你配置的觅思文档地址为：", MRDOC_URL)
    if MRDOC_URL.endswith("/"):
        MRDOC_URL = MRDOC_URL[:-1]

    MRDOC_TOKEN = CONFIG.get("mrdoc", "token", fallback="")
    if MRDOC_TOKEN == "":
        MRDOC_TOKEN = input("2)请输入你的觅思文档用户 Token:")
    else:
        print("2)你配置的觅思文档用户Token为:", MRDOC_TOKEN)

    YUQUE_URL = CONFIG.get('yuque','url',fallback="")
    if YUQUE_URL == '':
        YUQUE_URL = "https://www.yuque.com/api/v2"
        print("3)你配置的语雀域名为：",YUQUE_URL)

    YUQUE_TOKEN = CONFIG.get('yuque', 'token', fallback="")
    if YUQUE_TOKEN == "":
        YUQUE_TOKEN = input( "4)请输入你的语雀用户 Token:")
    else:
        print("4)你配置的语雀用户Token为：",YUQUE_TOKEN)

    while True:
        try:
            YUQUE_TOC = int(input("5)是否按知识库目录进行导入，1-是 2否："))
            break
        except ValueError:
            print("请仅输入整数数字")

    if YUQUE_TOC == 1:
        YUQUE_TOC = True
    else:
        YUQUE_TOC = False

    while True:
        try:
            editor_mode = int(input("6)请输入文档编辑模式，1-markdown 2-富文本："))
            break
        except ValueError:
            print("请仅输入整数数字")

    if editor_mode != 1:
        editor_mode = 3

    op = YuqueExport()
    op.get_user_info()
    op.get_repos()
    os.system('pause')